This is a pipeline to analyze single-cell RNA Seq data from GEO. In this script the single cell RNA-Seq results from this paper is regenerated: https://pubmed.ncbi.nlm.nih.gov/34956864/ Title: Bulk and Single-Cell Profiling of Breast Tumors Identifies TREM-1 as a Dominant Immune Suppressive Marker Associated With Poor Outcomes
library(Seurat)
library(tidyverse)
library(GEOquery)
file <- getGEOSuppFiles("GSE188600")
untar("GSE188600/GSE188600_RAW.tar", exdir = 'data/')
wd = getwd()
files = list.files(path = paste0(wd, '/data'), full.names = FALSE, recursive = FALSE)
mtx.cnts <- ReadMtx(mtx = paste0('data/', files[3]),
features = paste0('data/', files[2]),
cells = paste0('data/', files[1]))
Seurat.obj <- CreateSeuratObject(counts = mtx.cnts)
Seurat.obj
## An object of class Seurat
## 33694 features across 770 samples within 1 assay
## Active assay: RNA (33694 features, 0 variable features)
view(Seurat.obj@meta.data)
Seurat.obj$mito.prct <- PercentageFeatureSet(Seurat.obj, pattern = '^MT-')
VlnPlot(Seurat.obj, features = c("nFeature_RNA", "nCount_RNA", "mito.prct"), ncol = 3)
FeatureScatter(Seurat.obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
Seurat.obj.filt <- subset(Seurat.obj, subset = nCount_RNA >800 &
nFeature_RNA > 500 &
mito.prct < 10)
Seurat.obj.filt <- NormalizeData(object = Seurat.obj.filt)
Seurat.obj.filt <- FindVariableFeatures(object = Seurat.obj.filt, selection.method = 'vst', nfeatures = 2000)
top10 <- head(VariableFeatures(Seurat.obj.filt), 10)
plot1 <- VariableFeaturePlot(Seurat.obj.filt)
LabelPoints(plot = plot1, points = top10, repel = TRUE)
Seurat.obj.filt <- ScaleData(object = Seurat.obj.filt)
Seurat.obj.filt <- RunPCA(object = Seurat.obj.filt, features = VariableFeatures((Seurat.obj.filt)))
ElbowPlot(Seurat.obj.filt, ndims = 35)
Seurat.obj.filt <- FindNeighbors(object = Seurat.obj.filt, dim = 1:20)
Seurat.obj.filt <- RunUMAP(object = Seurat.obj.filt, dim = 1:20)
Seurat.obj.filt <- FindClusters(object = Seurat.obj.filt, resolution = c(0.01, 0.1, 0.3, 0.5, 0.8, 1, 1.2))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 736
## Number of edges: 21217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9932
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 736
## Number of edges: 21217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9618
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 736
## Number of edges: 21217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9111
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 736
## Number of edges: 21217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8790
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 736
## Number of edges: 21217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8422
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 736
## Number of edges: 21217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8190
## Number of communities: 11
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 736
## Number of edges: 21217
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7959
## Number of communities: 11
## Elapsed time: 0 seconds
DimPlot(Seurat.obj.filt, group.by = 'RNA_snn_res.0.5', label = TRUE)
Seurat.obj.filt <- RunUMAP(object = Seurat.obj.filt, dim = 1:20)
Seurat.obj.filt <- RunTSNE(object = Seurat.obj.filt, dims = 1:20)
Idents(Seurat.obj.filt) <- 'RNA_snn_res.0.5'
DimPlot(Seurat.obj.filt, reduction = 'umap', label = TRUE)
DimPlot(Seurat.obj.filt, reduction = 'tsne' , label = TRUE)
DefaultAssay(Seurat.obj.filt) # make sure it is RNA
## [1] "RNA"
Seurat.obj.filt.markers <- FindAllMarkers(Seurat.obj.filt, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
clust.markers <- Seurat.obj.filt.markers %>%
group_by(cluster) %>%
slice_max(n = 4, order_by = avg_log2FC)
DoHeatmap(Seurat.obj.filt, features = clust.markers$gene, size = 4,
angle = 90)
RidgePlot(Seurat.obj.filt, features = clust.markers$gene, ncol = 6)
VlnPlot(Seurat.obj.filt, features = clust.markers$gene, ncol = 4)
FeaturePlot(Seurat.obj.filt, features = clust.markers$gene, ncol = 4)
FeaturePlot(Seurat.obj.filt, features = c('CD163'), min.cutoff = 'q10')
features <- c("CST3", "CD86", "SPP1", "C1QA", "CTHRC1", "MGP", "CXCL13", "TNFRSF18", "JCHAIN", "IGKC",
"NKG7", "GNLY", "IL7R", "RPL34", "STMN1", "KIAA0101")
new.cluster.ids <- c("Mature DC", "DC", "Fibroblast", "T cell", "Immature DC",
"NK cells", "Naive T cell", "TNBC")
names(new.cluster.ids) <- levels(Seurat.obj.filt)
Seurat.obj.filt <- RenameIdents(Seurat.obj.filt, new.cluster.ids)
DimPlot(Seurat.obj.filt, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
DimPlot(Seurat.obj.filt, reduction = "umap")
DoHeatmap(subset(Seurat.obj.filt, downsample = 700), features = features, size = 3)
# Feature plot - visualize feature expression in low-dimensional
space
FeaturePlot(Seurat.obj.filt, features = features, ncol = 4)
top.features <- Seurat.obj.filt.markers %>%
group_by(cluster) %>%
slice_max(n = 6, order_by = avg_log2FC)
DoHeatmap(subset(Seurat.obj.filt, downsample = 700), features = top.features$gene, size = 3)
DotPlot(Seurat.obj.filt, features = features) + RotatedAxis()